**** WHICH OF THE TAX MULTIPLIERS MAKE IT MOST ROUND?
clear all 
capture log close

global data "/Users/ceweber/Dropbox/JPAM_TIV_code/original_data/"
global dta "/Users/ceweber/Dropbox/JPAM_TIV_code/dta_files/"
global dataclean "/Users/ceweber/Dropbox/JPAM_TIV_code/dta_output/"
global output "/Users/ceweber/Dropbox/JPAM_TIV_code/output/"

set more off
cd "${data}"

////////////////////////////
//////CLEAN THE DATA //////
////////////////////////////

use dispensing.dta
cd "${dataclean}"

sort location 

/*
gen group = group(location)
keep if group<=12
drop group 
*/

* Preliminaries
drop if deleted == 1
drop if refunded == 1

* generate timing variables
gen date=dofC(sessiontime)
format date %d
gen monthly=mofd(date)
gen date2=date+1
gen weekly = wofd(date2)
drop date2

* other cleaning based on data entry errors
egen pricemode=mode(price), by(inventoryid usableweight monthly)
rename pricemode pricemode_month
replace price=pricemode_month if abs(price/pricemode_month-10)<=.1
replace price=pricemode_month if  abs(price/pricemode_month-100)<=1
replace price=pricemode_month if  abs(price/pricemode_month-.1)<=.01
replace price=pricemode_month if  abs(price/pricemode_month-.01)<=.001

egen pricemode=mode(price), by(inventoryid usableweight weekly)
replace price=pricemode if abs(price/pricemode-10)<=.1
replace price=pricemode if  abs(price/pricemode-100)<=1
replace price=pricemode if  abs(price/pricemode-.1)<=.01
replace price=pricemode if  abs(price/pricemode-.01)<=.001
rename pricemode pricemode_weekly

gen TaxChange=0
replace TaxChange=1 if monthly>=666

egen pricemode=mode(price), by(inventoryid usableweight TaxChange)
replace price=pricemode if abs(price/pricemode-10)<=.1
replace price=pricemode if  abs(price/pricemode-100)<=1
replace price=pricemode if  abs(price/pricemode-.1)<=.01
replace price=pricemode if  abs(price/pricemode-.01)<=.001
drop pricemode
drop pricemode_month
rename pricemode_weekly pricemode

* drop insane prices
drop if price <= 0.01
drop if price >= 500

* for development
save dispensing_mode_cleaning, replace


//////////////////////////////
//COLLAPSE THE DATA//////////
////////////////////////////

use dispensing_mode_cleaning, replace

	*figure out the modal weight for this inventoryid, and only use obs that are that weight
	*this only applies to flower, for other types, we keep everything (because we don't have weight)
	
	egen modeweight = mode(usableweight), minmode by(inventoryid location)	
	drop if usableweight != modeweight&inventorytype==28

	*we want modal price 
		//daily
		egen modeprice = mode(price), max by(location inventoryid date)
		egen meanprice = mean(price), by(location inventoryid date)

		collapse (mean) meanprice modeprice usableweight inventorytype weekly, by(location inventoryid date) fast

		sort inventoryid date
		bysort inventoryid: replace modeprice = modeprice[_n-1] if modeprice[_n-1]==modeprice[_n+1]&modeprice==.
		
		gen price = modeprice 
		replace price = meanprice if modeprice==.
	
		save daily_mode_collapsed.dta, replace

				//quick stop off to save a small dataset for each location 
					egen group = group(location)
					sum group
					local max = r(max)
					forvalues g = 1(1)`max'{
						preserve
						
						keep if group==`g'
						sum location 
						local name = r(mean)
						
						save daily_mode_collapsed_`name'.dta, replace
						restore
					}
	
	
		use daily_mode_collapsed.dta, clear
		drop modeprice meanprice
		//now weekly
		egen modeprice = mode(price), max by(location inventoryid weekly)
		egen meanprice = mean(price), by(location inventoryid weekly)

		collapse (mean) meanprice modeprice usableweight inventorytype, by(location inventoryid weekly) fast

				sort inventoryid weekly
				bysort inventoryid: replace modeprice = modeprice[_n-1] if modeprice[_n-1]==modeprice[_n+1]&modeprice==.
		
				gen price = modeprice 
				replace price = meanprice if modeprice==.
				
				gen num_trans = 1
 

	* we're going to need a single panel identifier
	egen panel = group(location weekly)

	order location weekly panel price, first

save collapsed_price_trans_counts_kendall, replace


////////////////////////////////////////////////////////////////
//////FIGURE OUT THE BEST ROUNDER FOR EACH WEEK INVENTORYLOT//////
////////////////////////////////////////////////////////////////


use collapsed_price_trans_counts_kendall, clear

* need to join in the tax rates stuff
rename location locid

* get the tax locality in there
cd "$dta"
merge m:1 locid using locations-to-tax-locality, keep(match master)
drop _merge

* now get the rates
gen quarter = qofd(dofw(weekly))

merge m:1 locality_code quarter using allrates, keep(match master)
drop _merge
cd "$dataclean"

* new state regime starts in week 2885
gen excise_tax = 1.25
replace excise_tax = 1.37 if weekly >= 2886

gen double tax_mult_none = 1.0
gen double tax_mult_sales_only = combined_rate + 1.0
gen double tax_mult_excise_only = excise_tax
gen double tax_mult_both_add = round(combined_rate + excise_tax, .001)
gen double tax_mult_both_mult = excise_tax * (1 + combined_rate)

drop excise_tax combined_rate local_rate state_rate

***
* Identify top two ratios by week

mata
	// Define function that takes an array of prices and num_trans, as well as a ratio to test, and returns the percent round
	// data is expected to be in form [price, num_trans]
	scalar PercentRound(data, ratio, level) {
		// get the new prices implied by the ratio
		new_price = data[., 1] :* ratio
		
		// round them and then check
		round_price = round(new_price, level)
		check = abs(new_price - round_price) :<= 0.011
		
		// how much do these new prices match?
		conform = sum(check :* data[.,2])
		round_amount = conform / sum(data[.,2])
		//printf("ratio = %f \t round_amount = %f\n", ratio, round_amount)
		
		return(round_amount)
	}
end

mata	
	// function that finds the best ratio given some data
	function FindBestRatios(data, level, allow_one_point_five) {
		printf("Allow = %f\t", allow_one_point_five)
		best_amount = PercentRound(data, 1.0, level)
		best_ratio = 1.0
		
		second_best_amount = 0.0
		second_best_ratio = 0.0
		//printf("Starting with best_amount = %f \n", best_amount)
		
		for (iratio=1001; iratio <= 1515; iratio = iratio + 1) {
			ratio = round(iratio / 1000, .001)
			// skip 1.5 exactly if we don't think it's plausible
			//printf("ratio = %f\t", ratio)
			if ((round(ratio, .001) == 1.500) & (allow_one_point_five == 0)) {
				printf("Skip 1.5...\t")
				continue
			}
			//printf("Didn't skip.\n")
			test_amount = PercentRound(data, ratio, level)
			//printf("ratio = %f \t test = %f \t br = %f \t ba = %f \t 2br = %f \t 2ba = %f\n", ratio, test_amount,best_ratio, best_amount, second_best_ratio, second_best_amount)
			
			if (test_amount > best_amount) {
				// first, kick back the previous best to the second best, but only if there is some difference between what WOULD be the second best and what we just tested
				if (abs(ratio - best_ratio) > 0.002) {
					second_best_amount = best_amount
					second_best_ratio = best_ratio
				}
				// either way, replace the best thing
				best_amount = test_amount
				best_ratio = ratio
			} else if (test_amount > second_best_amount) {
				// kick it back only if the difference will be big enough
				if (abs(ratio - best_ratio) > 0.002) {
					second_best_amount = test_amount
					second_best_ratio = ratio
				}
				
			}
		}
		
		out = (best_ratio, best_amount, second_best_ratio, second_best_amount)
		
		return(out)
	}

end


// get all the data into mata
sort locid weekly panel price
mata
	all_data = st_data(., "panel price num_trans locid weekly")
end

// now we want to change the dataset to just be location and weekly
egen total_trans = sum(num_trans)
keep locid weekly panel total_trans tax_mult_*
bysort locid weekly: keep if _n == 1

// gen the new variables
gen best_ratio_quarters = .
gen best_amount_quarters = .
gen second_best_ratio_quarters = .
gen second_best_amount_quarters = .

// and the variables for the tax-rate only series
foreach v of varlist tax_mult_* {
	gen `v'_amt = .
}

gen allow_one_point_five = (abs(tax_mult_both_mult - 1.5) <= 0.01)

mata
	// now turn this into a panel
	info = panelsetup(all_data, 1)
	// how many panels are we dealing with?
	num_panels = rows(info)
	
	//panelsubview(panel_data = ., all_data, 1, info)
	//FindBestRatios(panel_data[., 2..3], 0.25, 0)
	
	// we need to get the tax multipliers in here too to search those
	tax_mults = st_data(., "tax_mult_none tax_mult_sales_only tax_mult_excise_only tax_mult_both_add tax_mult_both_mult")
	allow_one_point_five = st_data(., "allow_one_point_five")
	
	// make sure it's the same
	asserteq(num_panels, rows(tax_mults))
	
	// allocate space for output
	best_ratio_quarters = J(num_panels,1,.)
	best_amount_quarters = J(num_panels,1,.)
	second_best_ratio_quarters = J(num_panels,1,.)
	second_best_amount_quarters = J(num_panels,1,.)

	tax_mult_none_amt = J(num_panels,1,.)
	tax_mult_sales_only_amt = J(num_panels,1,.)
	tax_mult_excise_only_amt = J(num_panels,1,.)
	tax_mult_both_add_amt = J(num_panels,1,.)
	tax_mult_both_mult_amt = J(num_panels,1,.)
	
	
	// loop through the panels and find the thing
	for (i = 1; i <= num_panels; i++) {
		printf("Working on panel %f out of %f...\t",i, num_panels)
		panelsubview(panel_data = ., all_data, i, info)
		
		// only need to send the second couple of columns
		out = FindBestRatios(panel_data[., 2..3], 0.25, allow_one_point_five[i])
		
		best_ratio_quarters[i] = out[1]
		best_amount_quarters[i] = out[2]
		second_best_ratio_quarters[i] = out[3]
		second_best_amount_quarters[i] = out[4]
		
		// now get the stuff for the possible tax rates
		tax_mult_none_amt[i] = PercentRound(panel_data[., 2..3],tax_mults[i,1], 0.25)
		tax_mult_sales_only_amt[i] = PercentRound(panel_data[., 2..3],tax_mults[i,2], 0.25)
		tax_mult_excise_only_amt[i] = PercentRound(panel_data[., 2..3],tax_mults[i,3], 0.25)
		tax_mult_both_add_amt[i] = PercentRound(panel_data[., 2..3],tax_mults[i,4], 0.25)
		tax_mult_both_mult_amt[i] = PercentRound(panel_data[., 2..3],tax_mults[i,5], 0.25)
		
		printf("Got optimal_ratio = %f, round_amount = %f\n",best_ratio_quarters[i], best_amount_quarters[i])
	}
	
	// ok now we need to upload that data to stata
	st_store(., "best_ratio_quarters", best_ratio_quarters)
	st_store(., "best_amount_quarters", best_amount_quarters)
	st_store(., "second_best_ratio_quarters", second_best_ratio_quarters)
	st_store(., "second_best_amount_quarters", second_best_amount_quarters)
	
	// and the tax ones
	st_store(., "tax_mult_none_amt", tax_mult_none_amt)
	st_store(., "tax_mult_sales_only_amt", tax_mult_sales_only_amt)
	st_store(., "tax_mult_excise_only_amt", tax_mult_excise_only_amt)
	st_store(., "tax_mult_both_add_amt", tax_mult_both_add_amt)
	st_store(., "tax_mult_both_mult_amt", tax_mult_both_mult_amt)
	
end

* save the output
compress
saveold rounding_results_weekly_expand_kendall, replace

use rounding_results_weekly_expand_kendall, clear
rename locid location 
egen group = group(location)
sum group
local locationmax = r(max)
save rounding_results_weekly_expand_kendall2, replace
